Objectives

The goal of this step in the workflow is to compare S2 stripchart stream height values to the stream height values recorded at manual checkpoints during the time period of interest. Manual checkpoints can be used to quality check stream height values being collected by the stripchart.

Set Up

  • Install/load necessary R packages

  • Recreate intermediate data sets for use in this .Rmd file:

    • Individual water years’ worth of stripchart data

    • All water years’ worth of stripchart data

#2017 
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"

#read in the 2017 data 
wy2017 <- read_excel(path = here(filepath, "WY2017.S2_Breakpoint.xlsx"), 
                     skip =3) #skip the top 3 rows of the Excel Sheet 

#clean the 2017 data
wy2017_clean <- wy2017 %>% 
  #change column names
  rename(datetime = "Date/time",
         stream_height_ft = "Stage.ft") %>% 
  #remove unnecessary columns 
  select(c("datetime", "stream_height_ft")) %>% 
  #format column names into lower_snake_case
  clean_names() %>% 
  #create a date column 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT")) 

#2018

filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"

#create a function to specify custom time intervals 
`%between%` <- function(x, interval) x >= interval[1] & x <= interval[2]

#read in the 2018 data 
wy2018 <- read_excel(path = here(filepath, "WY2018.S2_Breakpoint.xlsx"), 
                     skip =3)

#clean the 2018 data
wy2018_clean <- wy2018 %>% 
  rename(datetime = "Date/time",
         stream_height_ft = "Stage.ft") %>% 
  select(c("datetime", "stream_height_ft")) %>% 
  clean_names() %>% 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT")) 

#assign October NA values 
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-10-09', '2018-10-10'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-10-11', '2018-10-15'))] <- NA

#assign November NA values 
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-11-10', '2018-11-14'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-11-25', '2018-12-04'))] <- NA

#assign December NA values 
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-12-01', '2018-12-03'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-12-08', '2018-12-10'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-12-11', '2018-12-31'))] <- NA

wy2018_altered <- wy2018_clean
wy2018_altered <- subset(wy2018_altered, !(wy2018_altered$date %between% as.Date(c('2018-10-31', '2018-11-01')) & wy2018_altered$stream_height_ft < 0.10))

filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"

#2019 
#read in the 2019 data 
wy2019 <- read_excel(path = here(filepath, "WY2019.S2_Breakpoint.xlsx"), 
                     skip =3)

#clean the 2019 data
wy2019_clean <- wy2019 %>% 
  rename(datetime = "Date/time",
         stream_height_ft = "Stage.ft") %>% 
  select(c("datetime", "stream_height_ft")) %>% 
  clean_names() %>% 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT")) 

#assign February NA values 
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-01', '2019-04-02'))] <- NA

#assign March NA values 
wy2019_clean$stream_height_ft[wy2019_clean$date == as.Date('2019-03-14')] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date == as.Date('2019-03-18')] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-03-30', '2019-03-31'))] <- NA

#assign April NA values 
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-01', '2019-04-02'))] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-08', '2019-04-17'))] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-17', '2019-04-22'))] <- NA

#assign September NA values 
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-09-03', '2019-09-10'))] <- NA

#2020 
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"

#read in the 2020 data 
wy2020 <- read_excel(path = here(filepath, "WY2020.S2_Breakpoint.xlsx"), 
                     skip =3)

#clean the 2020 data
wy2020_clean <- wy2020 %>% 
  rename(datetime = "Date/time",
         stream_height_ft = "Stage.ft") %>% 
  select(c("datetime", "stream_height_ft")) %>% 
  clean_names() %>% 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT")) 

#create date range to fill in with NA values 
start_date <- as.POSIXct("7/28/2020 13:40:00", 
                         format = '%m/%d/%Y %H:%M:%S',
                         tz = "GMT")
end_date <- as.POSIXct("8/11/2020 00:00:00", 
                         format = '%m/%d/%Y %H:%M:%S',
                         tz = "GMT")

#create a function to specify custom time intervals 
`%between%` <- function(x, interval) x >= interval[1] & x <= interval[2]

#assign February NA values 
wy2020_clean$stream_height_ft[wy2020_clean$datetime %between% c(start_date, end_date)] <- NA

#2021 
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"

#read in the 2021 data 
wy2021 <- read_excel(path = here(filepath, "WY2021.S2_Breakpoint.xlsx"), 
                     skip =3)

#clean the 2021 data
wy2021_clean <- wy2021 %>% 
  rename(datetime = "Date/time",
         stream_height_ft = "Stage.ft") %>% 
  select(c("datetime", "stream_height_ft")) %>% 
  clean_names() %>% 
  mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT")) 

#identify date intervals that need to be filled w/NAs
date1 <- as.POSIXct("2021-03-04 00:00:00", 
                         format = '%Y-%m-%d %H:%M:%S',
                         tz = "GMT")
date2 <- as.POSIXct("2021-03-09 00:00:00", 
                         format = '%Y-%m-%d %H:%M:%S',
                         tz = "GMT")
date3 <- as.POSIXct("2021-11-11 23:59:00", 
                         format = '%Y-%m-%d %H:%M:%S',
                         tz = "GMT")
date4 <- as.POSIXct("2021-12-29 00:00:00", 
                         format = '%Y-%m-%d %H:%M:%S',
                         tz = "GMT")


#create a function to specify custom time intervals 
`%between%` <- function(x, interval) x >= interval[1] & x <= interval[2]

#assign March NA values
wy2021_clean$stream_height_ft[wy2021_clean$datetime %between% c(date1, date2)] <- NA

#assign Nov-Dec NA values 
wy2021_clean$stream_height_ft[wy2021_clean$datetime %between% c(date3, date4)] <- NA

#all water years 
all_streamflow <- rbind(wy2017_clean, 
                        wy2018_altered, 
                        wy2019_clean,
                        wy2020_clean,
                        wy2021_clean)
  • Set date range
min_year = 2017
max_year = 2021

Manual Streamflow Checkpoints (2017 - 2021)

  • Read in the manual streamflow checkpoints from the External-MEF_DATA Box folder
    • The raw data is found at the following file path: External-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecks
  • Clean and manipulate the data into a tidy format
    • Renaming columns to use lower_snake_case and avoid special characters via clean_names()
    • Convert the collected column from a character into a POSIXct date format
  • Subset the data to include only the data of interest (S2 weir data) and the time period of interest (2017 - 2021)
#create file path to call the data from Box 
## Mia's file path 
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecks"

#read in the manual checks data 
mc <- read_csv(here(filepath, "2017-2021_S2Stage.csv"))

#clean the data
mc_clean <- mc %>% 
  clean_names() %>% 
  #remove S2 lagging pool data and keep only the S2 weir data
  subset(name == "S2 WEIR") %>%
  mutate(collected = as.POSIXct(collected, format = '%m/%d/%Y %H:%M', 
                                tz = "GMT"),
         year = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         date = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         diff = point_gage - stripchart_stage
         ) %>% 
  subset(year >= min_year & year <= max_year) 

#create subsets of the data where NA values are removed from a col of interest
#point check data 
pc_clean <- mc_clean %>% 
  #remove rows with NA values in the point_gage col 
  subset(!is.na(point_gage))

#stripchart check data 
sc_clean <- mc_clean %>% 
  #remove rows with NA values in the stripchart_stage col 
  subset(!is.na(stripchart_stage))

#format data for plotting 
mc_plot <- mc_clean %>% 
  select(-logger_stage,
         -year,
         -date,
         -site,
         -name) %>% 
  pivot_longer(cols = c("point_gage", "stripchart_stage"),
               names_to = "type", 
               values_to = "stream_height_ft")
  • Plot the manual checks data
#sample size
n <- nrow(mc_plot)

ggplot(data = mc_plot) + 
  geom_point(aes(x = collected, 
                 y = stream_height_ft,
                 color = type),
             alpha = 0.5) +  
  theme_classic() + 
  labs(y = "Stream Height (ft)", 
       x = "Time", 
       title = paste0("S2 Manual Checkpoints VS \n Stripchart Values Over Time (", min_year, "-", max_year, ")"),
       color = "Checkpoint Type",
       caption = paste0("n = ", n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  scale_color_discrete(labels = c("Point Gage", "Stripchart Stage"))

  • Plot the differences over time, where the difference is calculated as point_gage - stripchart_stage
#sample size
n <- nrow(mc_plot)

ggplot(data = mc_plot) + 
  geom_point(aes(x = collected, y = diff)) + 
  geom_smooth(aes(x = collected, y = diff), method = "lm") +  
  theme_classic() + 
  labs(y = "Stream Height (ft)", 
       x = "Point Gage - Stripchart Stage", 
       title = paste0("S2 Manual Checkpoints VS \n Stripchart Differences Over Time (", min_year, "-", max_year, ")"), 
       caption = paste0("n = ", n)
       ) + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) + 
  scale_color_discrete(labels = c("Point Gage", "Stripchart Stage"))

Streamflow + Manual Checks (2017 - 2021)

  • Plot stripchart data (as lines) and manual checkpoints (as points)

For interactive plots,

  • stripchart stream height (ft) is plotted as a black line, and

  • manual checkpoints of stream height (ft) are plotted as red dots.

#plot
p_all <- ggplot() +
  #stripchart streamflow data
  geom_line(data = all_streamflow,
            aes(x = datetime, 
                y = stream_height_ft,
                group = 1,
                #create hovering labels for interactive graph 
                text = paste0("DateTime: ", datetime, "\n",
                              "Stream Height (ft): ", round(stream_height_ft, digits = 3))),
            size = 0.25) + 
  #manual checkpoints 
  geom_point(data = mc_clean, 
             aes(x = collected, 
                 y = point_gage,
                 group = 1,
                 #create hovering labels for interactive graph 
                 text = paste0("DateTime: ", collected, "\n",
                              "Stream Height Checkpoint (ft): ", round(stripchart_stage, digits = 3))), 
             color = "red",
             size = 0.5) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Stream Height (ft)",
       title = paste0("S2 Bog Stream Height (", min_year, "-", max_year, ")"
       ),
       subtitle = "Stripchart data are plotted as black lines. Manual checks are plotted as red dots.") + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5))

#static plot
#p_all

#interactive plot 
ggplotly(p_all, tooltip = "text")

1:1 Stripchart vs Manual Checkpoints Comparison (2017 - 2021)

Next, we are interested in a 1:1 comparison of stripchart data vs manual checkpoints data at the exact same timestamp. Since the stripchart data and manual checkpoints do not align perfectly, we will interpolate the stripchart stream flow values using the na.approx() function from the zoo package to estimate stripchart values at the time of the manual checks.

Stripchart vs Stripchart Stage

  • Clean the data

  • Note that the manual checks data only range from 2017-04-04 to 2019-12-31, which is a smaller time range than the stripchart data

#clean the stripchart_stage data (using the point_gage col)
sc_sub <- sc_clean %>% 
  #extract the data (without the timestamp)
  mutate(date = format(as.POSIXct(collected, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = as.numeric(year)
         ) %>% 
  #rename column
  rename(datetime = collected) %>% 
  #remove unnecessary columns 
  select(-site, -lab_id, -name, -point_gage, -logger_stage, -diff) 

#identify date ranges of interest: 
##aka the range of the manual checkpoint data
max_date <- max(sc_sub$date)
min_date <- min(sc_sub$date)

#subset stripchart data to fit within the time range of the manual checkpoints
streamflow_sub <- all_streamflow %>% 
  mutate(
    year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                             tz = "GMT"),
                  format = '%Y'), 
    year = as.numeric(year)
    ) %>% 
  subset(date <= max_date & date >= min_date) 
#create dummy dataset (which is a subset of the data)
test <- full_join(x = sc_sub, 
                y = streamflow_sub, 
                by = c("datetime", "date", "year")) %>% 
  arrange(datetime)

x <- test[13:17, ]

#approximate linearly 
y <- x %>% 
  mutate(approx = na.approx(stream_height_ft, 
                            na.rm =FALSE, 
                            method = "linear", 
                            maxgap = 6)
         )

# #plot
# ggplot(data = y, aes(x = datetime, y = approx)) + 
#   geom_line() + 
#   geom_point()

z <- full_join(x = sc_sub, 
                y = streamflow_sub, 
                by = c("datetime", "date", "year")) %>% 
  arrange(datetime) %>% 
  mutate(approx = na.approx(stream_height_ft, 
                            na.rm =FALSE, 
                            method = "linear", 
                            maxgap = 6)
         )
  • Join the manual checkpoints (sc_sub) dataset with the continuous stripchart dataset (streamflow_sub)

  • Optional: Plot the data to visually examine if the approximated values generated by na.approx() look correct

#join the stripchart data and manual checks data 
fj_sc <- full_join(x = sc_sub, 
                y = streamflow_sub, 
                by = c("datetime", "date", "year"))  %>%
  #order by datetime
  arrange(datetime) %>% 
  #interpolate to fill in missing NA values
  mutate(approx = na.approx(stream_height_ft,
                            na.rm = FALSE,
                            #x = datetime,
                            #xout = datetime,
                            method = "linear",
                            maxgap = 6),
         #calculate the difference between approximated values and real values
         diff1 = approx - stream_height_ft, #should be zero 
         diff2 = approx - stripchart_stage,
         diff = coalesce(diff1, diff2)) 

#test the full join did not drop any observations 
if(nrow(fj_sc) != (nrow(sc_sub) + nrow(streamflow_sub))) stop("Check fj_sc dataframe dimensions")

#test that diff1 = 0 at all times 
x <- fj_sc[!is.na(fj_sc$diff1),]
if(sum(x$diff1) != 0) stop("approx - stream_height_ft should be 0")

#test that diff2 != 0 
y <- fj_sc[!is.na(fj_sc$diff2),]
if(sum(y$diff2) == 0) stop("approx - stripchart_stage should not be 0")

# fj_sc_long <- fj_sc %>% 
#   #rearrange dataframe into a long format
#   pivot_longer(cols = c("stripchart_stage", "stream_height_ft", "approx"),
#                names_to = "types",
#                values_to = "stream_height_ft")

##visually examine the approximated values 
# ggplot(data = fj_sc) +
#   geom_line(aes(x = datetime, y = approx))
# 
# ggplot(data = fj_sc) +
#   geom_point(aes(x = datetime,
#                 y = stream_height_ft,
#                 color = types))
# 
# ggplot(data = fj_sc) +
#   geom_point(aes(x = datetime,
#                 y = stream_height_ft,
#                 color = types)) +
#   geom_point(aes(x = datetime,
#                 y = approx),
#              alpha = 0.05)


# fj_sc_wide <- fj_sc %>%
#   #remove extraneous columns 
#   select(-stream_height_ft, -diff) %>% 
#   #return to wide format to create scatterplot
#   pivot_wider(
#     names_from = "types",
#     values_from = "approx"
#   ) %>% 
#   #unlist the columns created by pivot_wider() 
#   unnest()


# #visually examine all data 
# ggplot(data = fj_sc) +
#   geom_point(aes(x = manual_check,
#                  y = stripchart))
  • Subset the joined dataframe to include only the timestamps of interest (AKA the timestamps from the original stripchart_stage dataset)

  • Note there will be some values that could not be approximated due to lack of stripchart data

#left join the joined data and stripchart_stage data to keep only the timestamps of interest 
lj_sc <- left_join(x = sc_sub, 
                y = fj_sc,
                by = c("datetime", "year", "stripchart_stage", "date")) %>% 
  #ensure the correct column types
  mutate(stripchart_stage = as.numeric(stripchart_stage),
         stream_height_ft = as.numeric(stream_height_ft),
         approx = as.numeric(approx)
  ) %>% 
  #remove duplicates
  unique()

#test if the subsetted data has the same number of observations as the manual checks dataframe
#if(nrow(lj) != (nrow(pc_sub))) stop("Check lj dataframe dimensions")

#test if the approximated data differs greatly from the stripchart/manual checks data 
#if(abs(lj$diff) > 0.01 ) stop("Check differences")
  • Plot
#sample size
n <- nrow(lj_sc)

#plot 
p_1to1 <- ggplot() + 
  geom_point(data = lj_sc, 
             aes(x = stripchart_stage, #from manual checkpoints dataset
                 y = approx, #continuous stripchart values 
                 group = 1,
                 #create hovering labels for interactive graph 
                 text = paste0("Stripchart Stage (ft): ", stripchart_stage, "\n",
                              "Interpolated Values (ft): ", round(approx, digits = 3))),
             alpha = 0.4) + 
  theme_classic() +
  labs(x = "Stripchart Stage",
       y = "Interpolated Values",
       title = paste0("S2 Stripchart vs Interpolated Values (", min_year, "-", max_year, ")"),
       subtitle = "The line y = x is plotted for reference.") +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5)) + 
  geom_abline(slope = 1, intercept = 0)

#static plot
#p_1to1

#interactive plot 
ggplotly(p_1to1, tooltip = "text") %>% 
  layout(
    title = list(text = paste0("S2 Stripchart Stage vs Interpolated Values (", min_year, "-", max_year, ")",
                                    '<br>',
                                    '<sup>',
                                     #subtitle
                                     paste0("n = ", n), 
                                    '</sup>')), 
         #title size 
         font = list(size = 14))

Stripchart vs Point Gage Manual Checkpoints

#clean the point gage data 
pc_sub <- pc_clean %>% 
  #extract the data (without the timestamp)
  mutate(date = format(as.POSIXct(collected, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = as.numeric(year)
         ) %>% 
  #rename column
  rename(datetime = collected) %>% 
  #remove unnecessary columns 
  select(-site, 
         -lab_id, 
         -name, 
         -stripchart_stage, 
         -logger_stage,
         -diff)
  

#identify date ranges of interest: 2017-04-04 to 2019-12-31
##aka the range of the manual checkpoint data
max_date <- max(pc_sub$date)
min_date <- min(pc_sub$date)
nrows <- nrow(pc_sub)

# #subset stripchart data to fit within the time range of the manual checkpoints
# streamflow_sub <- all_streamflow %>% 
#   mutate(
#     year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
#                              tz = "GMT"),
#                   format = '%Y'), 
#     year = as.numeric(year)
#     ) %>% 
#   subset(date <= max_date & date >= min_date) 
  • Join the manual checks dataset with the stripchart dataset

  • Plot the data to visually examine if the approximated values generated by na.approx() look correct

#join the stripchart data and manual checks data 
fj_pc <- full_join(x = pc_sub, 
                y = streamflow_sub, 
                by = c("datetime", "date", "year")) %>% 
  #rename columns 
  rename(manual_check = point_gage,
         stripchart = stream_height_ft) %>% 
  #interpolate to fill in missing NA values
  mutate(approx = na.approx(object = stripchart,
                            x = datetime,
                            method = "linear",
                            maxgap = 6))

# #rearrange dataframe into a long format
#   pivot_longer(cols = c("manual_check", "stripchart"),
#                names_to = "types",
#                values_to = "stream_height_ft") %>%

# #visually examine the approximated values
# ggplot(data = fj_pc) +
#   geom_line(aes(x = datetime, y = approx))
# 
# ggplot(data = fj_pc) +
#   geom_point(aes(x = datetime,
#                 y = stream_height_ft,
#                 color = types))
# 
# ggplot(data = fj_pc) +
#   geom_point(aes(x = datetime,
#                 y = stream_height_ft,
#                 color = types)) +
#   geom_point(aes(x = datetime,
#                 y = approx),
#              alpha = 0.05)


fj_pc <- fj_pc %>%
  #remove extraneous columns 
  select(-stripchart) 
# %>% 
#   #return to wide format to create scatterplot
#   pivot_wider(
#     names_from = "types",
#     values_from = "approx"
#   ) %>% 
#   #unlist the columns created by pivot_wider() 
#   unnest


# #visually examine all data
# ggplot(data = fj_pc) +
#   geom_point(aes(x = manual_check,
#                  y = stripchart))
  • Subset the joined dataframe to include only the timestamps of interest (AKA the timestamps from the original manual checks dataset)

  • Plot

#left join the joined data and manual checks data to keep only the timestamps of interest 
lj_pc <- left_join(x = pc_sub, 
                y = fj_pc,
                by = c("datetime", "year", "date", "point_gage" = "manual_check")) %>% 
  #ensure the correct column types
  mutate(point_gage = as.numeric(point_gage),
         approx = as.numeric(approx),
         diff = point_gage - approx)

#test if the subsetted data has the same number of observations as the manual checks dataframe
#if(nrow(lj_pc) != (nrow(pc_sub))) stop("Check lj dataframe dimensions")

#test if the approximated data differs greatly from the stripchart/manual checks data 
#if(abs(lj_pc$diff) > 0.01 ) stop("Check differences")
#plot 
p_1to1 <- ggplot() + 
  geom_point(data = lj_pc, 
             aes(x = point_gage, 
                 y = approx, 
                 group = 1,
                 #create hovering labels for interactive graph 
                 text = paste0("Manual Checkpoint (ft): ", point_gage, "\n",
                              "Stripchart (ft): ", approx)),
             alpha = 0.5) + 
  theme_classic() +
  labs(x = "Manual Checkpoints",
       y = "Stripchart Data",
       title = paste0("S2 Manual Checks vs Interpolated Stripchart Values (", min_year, "-", max_year, ")"),
       subtitle = "The line y = x is plotted for reference.") +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5)) + 
  geom_abline(slope = 1, intercept = 0) +
  xlim(0, 0.4) +
  ylim(0, 0.4)

#static plot
#p_1to1

#interactive plot 
ggplotly(p_1to1, tooltip = "text")

Run this code chunk to knit to HTML without using the knit button in RStudio

  • Save as index.html to render GitHub Page

Code Accessibility

Note that this file is being knit as index.html into the manual_checks repository in order to update the GitHub pages website, where the plots can be viewed online.